import pandas as pd
import re
#医疗器械国家局上报-从任务信息中提取过滤申报住所和生产地址数据
def extract_addresses(task_info):
    if not isinstance(task_info, str):
        return None, None

    # Extract residence address (住所)
    residence_match = re.search(r'住所(?:位于|为|位于：|为：|：)(.*?)(?=。|生产地址|$)', task_info)
    residence = residence_match.group(1).strip() if residence_match else None

    # Extract production address (生产地址)
    production_match = re.search(r'生产地址(?:为|：|均为)(.*?)(?=。|$)', task_info)
    production = production_match.group(1).strip() if production_match else None

    # If production address not found but residence is mentioned as production address
    if not production and "生产地址" in task_info and residence:
        production = residence

    return residence, production

# Read the Excel file
df = pd.read_excel('F:\\tmp002.xlsx', sheet_name='SQL Results')

# Apply the extraction function
df[['住所', '生产地址']] = df['TASK_INFO'].apply(lambda x: pd.Series(extract_addresses(x)))

# Save to a new Excel file
output_filename = 'extracted_addresses20250331.xlsx'
df.to_excel(output_filename, index=False)

output_filename